import urllib.robotparser
import requests

rp = urllib.robotparser.RobotFileParser()
rp.set_url("https://item.taobao.com/robots.txt")
rp.read()

# 模拟Baiduspider
# useragent = 'Baiduspider'
useragent = 'Googlebot'
url = 'https://item.taobao.com/item.htm?spm=a21bo.21814703.201876.55.5af911d9OB9RpM&id=632402246672&scm=1007.34127.211940.0&pvid=a8afba24-6cbe-4eb1-a019-615f5ad3dd19'
# 使用解析后的 robots.txt 决定用户代理是否可以获取 url
if rp.can_fetch(useragent,url):
    file = requests.get(url)
    data = file.content
    fb = open("bd-html","wb")
    fb.write(data)
    fb.close()
